Read in Files

Change with new COVID file for most up to date info

covid_raw = read.csv("05-21-2020.csv",header = TRUE)
#covid_ts_confirmed = read.csv(file.choose(),header = TRUE)

Look at Raw Data

summary(covid_raw)
##         Province_State Country_Region              Last_Update      Lat        
##  Alabama       : 1     US:58          2020-05-22 02:36:57:58   Min.   :-14.27  
##  Alaska        : 1                                             1st Qu.: 34.59  
##  American Samoa: 1                                             Median : 39.06  
##  Arizona       : 1                                             Mean   : 36.84  
##  Arkansas      : 1                                             3rd Qu.: 42.36  
##  California    : 1                                             Max.   : 61.37  
##  (Other)       :52                                             NA's   :2       
##      Long_           Confirmed          Deaths        Recovered    
##  Min.   :-170.13   Min.   :     0   Min.   :    0   Min.   :   13  
##  1st Qu.:-101.17   1st Qu.:  2608   1st Qu.:   74   1st Qu.:  999  
##  Median : -87.94   Median : 10404   Median :  398   Median : 3122  
##  Mean   : -85.21   Mean   : 27192   Mean   : 1633   Mean   : 7105  
##  3rd Qu.: -76.97   3rd Qu.: 30109   3rd Qu.: 1422   3rd Qu.: 7327  
##  Max.   : 145.67   Max.   :356458   Max.   :28743   Max.   :62826  
##  NA's   :2                                          NA's   :16     
##      Active              FIPS          Incident_Rate    People_Tested    
##  Min.   :     0.0   Min.   :    1.00   Min.   :   0.0   Min.   :    124  
##  1st Qu.:   874.5   1st Qu.:   18.25   1st Qu.: 150.1   1st Qu.:  47444  
##  Median :  5850.5   Median :   32.50   Median : 254.6   Median : 142791  
##  Mean   : 20414.3   Mean   : 3288.09   Mean   : 403.8   Mean   : 233146  
##  3rd Qu.: 19697.5   3rd Qu.:   47.75   3rd Qu.: 482.2   3rd Qu.: 292469  
##  Max.   :264889.0   Max.   :99999.00   Max.   :1832.4   Max.   :1555055  
##                                        NA's   :2        NA's   :2        
##  People_Hospitalized Mortality_Rate       UID            ISO3   
##  Min.   :   65       Min.   :0.000   Min.   :      16   ASM: 1  
##  1st Qu.:  563       1st Qu.:3.030   1st Qu.:84000012   GUM: 1  
##  Median : 1534       Median :4.394   Median :84000028   MNP: 1  
##  Mean   : 4890       Mean   :4.446   Mean   :76761944   PRI: 1  
##  3rd Qu.: 4289       3rd Qu.:5.461   3rd Qu.:84000042   USA:53  
##  Max.   :76608       Max.   :9.585   Max.   :84099999   VIR: 1  
##  NA's   :24          NA's   :1                                  
##   Testing_Rate     Hospitalization_Rate
##  Min.   :   99.3   Min.   : 6.46       
##  1st Qu.: 2775.4   1st Qu.: 9.86       
##  Median : 3574.1   Median :14.12       
##  Mean   : 4025.5   Mean   :14.29       
##  3rd Qu.: 4981.7   3rd Qu.:17.47       
##  Max.   :11645.4   Max.   :27.92       
##  NA's   :2         NA's   :24
#covid_ts_confirmed

Plot Data

Initial lookinto ploting data points Note here

#install.packages("tidyverse")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
## 
##     nasa
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
plot(covid_raw)

byState10 <- covid_raw %>% head(10) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State)) + geom_point(aes(fill=Province_State)) + ggtitle("Deaths vs Confirmed Cases in First Ten Provinces")

ggplotly(byState10)
byState <- covid_raw %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State)) + geom_point(aes(fill=Province_State)) + ggtitle("Deaths vs Confirmed Cases per Province")
ggplotly(byState)
byStateNoNY <- filter(covid_raw, Province_State != 'New York' & Province_State != 'New Jersey' & Province_State != 'Hubei')  %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State)) + geom_point(aes(fill=Province_State)) + ggtitle("Deaths vs Confirmed Cases per Province")
ggplotly(byStateNoNY)
#covid_raw %>% head(10) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State) + geom_point(aes(fill=Province_State)) + ggtitle("Deaths vs Confirmed Cases in Top Countries (Minus China)")
statesAbove700Deaths <- filter(covid_raw, Deaths > 700)  %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State)) + geom_point(aes(fill=Province_State)) + ggtitle("Over 700 Mortality States")
ggplotly(statesAbove700Deaths)
statesAbove700Deaths <- filter(covid_raw, Deaths < 700)  %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State)) + geom_point(aes(fill=Province_State)) + ggtitle("Under 700 Mortality States")
ggplotly(statesAbove700Deaths)

Map data using Fips codes

library(e1071)
library(usmap)

UScovid_dataset <- filter(covid_raw, Country_Region == 'US' & FIPS != 'NA') 
UScovid_dataset
##              Province_State Country_Region         Last_Update      Lat
## 1                   Alabama             US 2020-05-22 02:36:57  32.3182
## 2                    Alaska             US 2020-05-22 02:36:57  61.3707
## 3            American Samoa             US 2020-05-22 02:36:57 -14.2710
## 4                   Arizona             US 2020-05-22 02:36:57  33.7298
## 5                  Arkansas             US 2020-05-22 02:36:57  34.9697
## 6                California             US 2020-05-22 02:36:57  36.1162
## 7                  Colorado             US 2020-05-22 02:36:57  39.0598
## 8               Connecticut             US 2020-05-22 02:36:57  41.5978
## 9                  Delaware             US 2020-05-22 02:36:57  39.3185
## 10         Diamond Princess             US 2020-05-22 02:36:57       NA
## 11     District of Columbia             US 2020-05-22 02:36:57  38.8974
## 12                  Florida             US 2020-05-22 02:36:57  27.7663
## 13                  Georgia             US 2020-05-22 02:36:57  33.0406
## 14           Grand Princess             US 2020-05-22 02:36:57       NA
## 15                     Guam             US 2020-05-22 02:36:57  13.4443
## 16                   Hawaii             US 2020-05-22 02:36:57  21.0943
## 17                    Idaho             US 2020-05-22 02:36:57  44.2405
## 18                 Illinois             US 2020-05-22 02:36:57  40.3495
## 19                  Indiana             US 2020-05-22 02:36:57  39.8494
## 20                     Iowa             US 2020-05-22 02:36:57  42.0115
## 21                   Kansas             US 2020-05-22 02:36:57  38.5266
## 22                 Kentucky             US 2020-05-22 02:36:57  37.6681
## 23                Louisiana             US 2020-05-22 02:36:57  31.1695
## 24                    Maine             US 2020-05-22 02:36:57  44.6939
## 25                 Maryland             US 2020-05-22 02:36:57  39.0639
## 26            Massachusetts             US 2020-05-22 02:36:57  42.2302
## 27                 Michigan             US 2020-05-22 02:36:57  43.3266
## 28                Minnesota             US 2020-05-22 02:36:57  45.6945
## 29              Mississippi             US 2020-05-22 02:36:57  32.7416
## 30                 Missouri             US 2020-05-22 02:36:57  38.4561
## 31                  Montana             US 2020-05-22 02:36:57  46.9219
## 32                 Nebraska             US 2020-05-22 02:36:57  41.1254
## 33                   Nevada             US 2020-05-22 02:36:57  38.3135
## 34            New Hampshire             US 2020-05-22 02:36:57  43.4525
## 35               New Jersey             US 2020-05-22 02:36:57  40.2989
## 36               New Mexico             US 2020-05-22 02:36:57  34.8405
## 37                 New York             US 2020-05-22 02:36:57  42.1657
## 38           North Carolina             US 2020-05-22 02:36:57  35.6301
## 39             North Dakota             US 2020-05-22 02:36:57  47.5289
## 40 Northern Mariana Islands             US 2020-05-22 02:36:57  15.0979
## 41                     Ohio             US 2020-05-22 02:36:57  40.3888
## 42                 Oklahoma             US 2020-05-22 02:36:57  35.5653
## 43                   Oregon             US 2020-05-22 02:36:57  44.5720
## 44             Pennsylvania             US 2020-05-22 02:36:57  40.5908
## 45              Puerto Rico             US 2020-05-22 02:36:57  18.2208
## 46             Rhode Island             US 2020-05-22 02:36:57  41.6809
## 47           South Carolina             US 2020-05-22 02:36:57  33.8569
## 48             South Dakota             US 2020-05-22 02:36:57  44.2998
## 49                Tennessee             US 2020-05-22 02:36:57  35.7478
## 50                    Texas             US 2020-05-22 02:36:57  31.0545
## 51                     Utah             US 2020-05-22 02:36:57  40.1500
## 52                  Vermont             US 2020-05-22 02:36:57  44.0459
## 53           Virgin Islands             US 2020-05-22 02:36:57  18.3358
## 54                 Virginia             US 2020-05-22 02:36:57  37.7693
## 55               Washington             US 2020-05-22 02:36:57  47.4009
## 56            West Virginia             US 2020-05-22 02:36:57  38.4912
## 57                Wisconsin             US 2020-05-22 02:36:57  44.2685
## 58                  Wyoming             US 2020-05-22 02:36:57  42.7560
##        Long_ Confirmed Deaths Recovered Active  FIPS Incident_Rate
## 1   -86.9023     13288    529        NA  12759     1     271.00752
## 2  -152.4044       401     10       356     35     2      54.81549
## 3  -170.1320         0      0        NA      0    60       0.00000
## 4  -111.4312     15348    764      3872  10712     4     210.86134
## 5   -92.3731      5458    110      3915   1433     5     180.85999
## 6  -119.6816     88031   3583        NA  84448     6     222.79435
## 7  -105.3111     23191   1310      3532  18349     8     402.70990
## 8   -72.7554     39208   3583      6264  29361     9    1099.71511
## 9   -75.5071      8386    317      4130   3939    10     861.19429
## 10        NA        49      0        NA     49 88888            NA
## 11  -77.0268      7788    412      1061   6315    11    1103.50847
## 12  -81.6868     48675   2144        NA  46531    12     226.63002
## 13  -83.6431     40663   1775        NA  38888    13     382.98371
## 14        NA       103      3        NA    100 99999            NA
## 15  144.7937       165      5       125     35    66     100.46947
## 16 -157.4983       647     17       578     52    15      45.69622
## 17 -114.4788      2506     77      1688    741    16     140.22993
## 18  -88.9861    102688   4607        NA  98081    17     810.36498
## 19  -86.2583     29936   1913        NA  28023    18     444.66765
## 20  -93.2105     16170    410      8672   7088    19     512.50844
## 21  -96.7265      8625    204       473   7948    20     296.05460
## 22  -84.6701      8286    386      2919   4981    21     185.46568
## 23  -91.8678     36504   2629     26249   7626    22     785.23591
## 24  -69.3819      1877     73      1145    659    23     139.63571
## 25  -76.8021     43531   2159      3099  38273    24     720.03480
## 26  -71.5301     90084   6148        NA  83936    25    1306.98529
## 27  -84.5361     53510   5129     28234  20147    26     535.80421
## 28  -93.9002     18200    818     12488   4894    27     322.71609
## 29  -89.6787     12222    580      7681   3961    28     410.66492
## 30  -92.2884     11689    668        NA  11021    29     190.45437
## 31 -110.4544       479     16       440     23    30      44.81754
## 32  -98.2681     11427    138        NA  11289    31     590.72336
## 33 -117.0554      7400    383       339   6678    32     240.24757
## 34  -71.5639      3935    199      1388   2348    33     289.39973
## 35  -74.5210    151586  10846     24236 116504    34    1706.62866
## 36 -106.2485      6472    294      1985   4193    35     308.65655
## 37  -74.9481    356458  28743     62826 264889    36    1832.35347
## 38  -79.8064     20512    728     11637   8147    37     195.57433
## 39  -99.7840      2229     51      1340    838    38     292.49589
## 40  145.6739        22      2        13      7    69      39.89555
## 41  -82.7649     30167   1837        NA  28330    39     258.07804
## 42  -96.9289      5680    304      4361   1015    40     143.54414
## 43 -122.0709      3817    145      1406   2266    41      90.49877
## 44  -77.2098     69252   4869        NA  64383    42     540.94719
## 45  -66.5901      2913    126        NA   2787    72      99.30429
## 46  -71.5118     13571    556      1047  11968    44    1281.05528
## 47  -80.9450      9381    416      5451   3514    45     182.20084
## 48  -99.4388      4177     48      3145    984    46     472.15933
## 49  -86.6923     18961    313     12191   6457    47     277.64705
## 50  -97.5635     53053   1460     30341  21252    48     182.96737
## 51 -111.8624      7874     92      4596   3186    49     245.60521
## 52  -72.7107       950     54       827     69    50     152.24627
## 53  -64.8963        69      6        61      2    78      64.32487
## 54  -78.1700     34137   1100      4778  28259    51     399.94053
## 55 -121.4905     19117   1044        NA  18073    53     251.04752
## 56  -80.9545      1593     70       983    540    54      88.88780
## 57  -89.6165     13885    487      8012   5386    55     238.47415
## 58 -107.3025       801     12       534    255    56     138.39958
##    People_Tested People_Hospitalized Mortality_Rate      UID ISO3 Testing_Rate
## 1         170739                1528       3.981036 84000001  USA   3482.20595
## 2          39545                  NA       2.493766 84000002  USA   5405.68249
## 3            124                  NA             NA       16  ASM    222.85725
## 4         171627                1830       4.977847 84000004  USA   2357.92929
## 5          99276                 535       2.015390 84000005  USA   3289.67686
## 6        1421127                  NA       4.070157 84000006  USA   3596.67691
## 7         135611                3990       5.648743 84000008  USA   2354.87440
## 8         202747               10946       9.138441 84000009  USA   5686.69507
## 9          47542                  NA       3.780110 84000010  USA   4882.29181
## 10            NA                  NA       0.000000 84088888  USA           NA
## 11         41756                  NA       5.290190 84000011  USA   5916.55107
## 12        813929                9200       4.404725 84000012  USA   3789.64041
## 13        407731                7235       4.365148 84000013  USA   3840.20680
## 14            NA                  NA       2.912621 84099999  USA           NA
## 15          5064                  NA       3.030303      316  GUM   3083.49926
## 16         47149                  82       2.627512 84000015  USA   3330.03266
## 17         38888                 221       3.072626 84000016  USA   2176.08201
## 18        672020                  NA       4.486405 84000017  USA   5303.26304
## 19        202995                4389       6.390299 84000018  USA   3015.27624
## 20        116565                  NA       2.535560 84000019  USA   3694.52976
## 21         71203                 760       2.365217 84000020  USA   2444.05512
## 22        158672                2016       4.658460 84000021  USA   3551.55805
## 23        305381                  NA       7.201950 84000022  USA   6569.03704
## 24         37327                 235       3.889185 84000023  USA   2776.86853
## 25        220233                7485       4.959684 84000024  USA   3642.81603
## 26        501486                9040       6.824741 84000025  USA   7275.81838
## 27        454740                  NA       9.585124 84000026  USA   4553.38451
## 28        173556                2380       4.494505 84000027  USA   3077.43484
## 29        125970                1932       4.745541 84000028  USA   4232.65099
## 30        162092                  NA       5.714775 84000029  USA   2641.04117
## 31         30524                  65       3.340292 84000030  USA   2855.97196
## 32         75640                  NA       1.207666 84000031  USA   3910.24024
## 33         94382                  NA       5.175676 84000032  USA   3064.19545
## 34         52830                 385       5.057179 84000033  USA   3885.38447
## 35        544274                  NA       7.155014 84000034  USA   6127.70049
## 36        147344                1139       4.542645 84000035  USA   7026.99171
## 37       1555055               76608       8.063503 84000036  USA   7993.67787
## 38        290645                  NA       3.549142 84000037  USA   2771.19253
## 39         61279                 144       2.288022 84000038  USA   8041.20925
## 40          4089                  NA       9.090909      580  MNP   7415.13129
## 41        299078                5295       6.089435 84000039  USA   2558.60588
## 42        149595                 917       5.352113 84000040  USA   3780.54325
## 43        105224                 732       3.798795 84000041  USA   2494.79757
## 44        368906                  NA       7.030844 84000042  USA   2881.63035
## 45          2913                  NA       4.325438      630  PRI     99.30429
## 46        123367                1506       4.096971 84000044  USA  11645.41644
## 47        138238                1444       4.434495 84000045  USA   2684.90345
## 48         31301                 342       1.149150 84000046  USA   3538.19946
## 49        360583                1539       1.650757 84000047  USA   5280.03826
## 50        770241                  NA       2.751965 84000048  USA   2656.38075
## 51        182874                 647       1.168402 84000049  USA   5704.19201
## 52         25701                  NA       5.684211 84000050  USA   4118.82261
## 53          1383                  NA       8.695652      850  VIR   1289.29410
## 54        218599                6269       3.222310 84000051  USA   2561.05106
## 55        297942                3125       5.461108 84000053  USA   3912.62228
## 56         83141                  NA       4.394225 84000054  USA   4639.18417
## 57        177123                2218       3.507382 84000055  USA   3042.07828
## 58         18840                  75       1.498127 84000056  USA   3255.24096
##    Hospitalization_Rate
## 1             11.499097
## 2                    NA
## 3                    NA
## 4             11.923378
## 5              9.802125
## 6                    NA
## 7             17.204950
## 8             27.917772
## 9                    NA
## 10                   NA
## 11                   NA
## 12            18.900873
## 13            17.792588
## 14                   NA
## 15                   NA
## 16            12.673879
## 17             8.818835
## 18                   NA
## 19            14.661277
## 20                   NA
## 21             8.811594
## 22            24.330196
## 23                   NA
## 24            12.519979
## 25            17.194643
## 26            10.035078
## 27                   NA
## 28            13.076923
## 29            15.807560
## 30                   NA
## 31            13.569937
## 32                   NA
## 33                   NA
## 34             9.783990
## 35                   NA
## 36            17.598888
## 37            21.491452
## 38                   NA
## 39             6.460296
## 40                   NA
## 41            17.552292
## 42            16.144366
## 43            19.177364
## 44                   NA
## 45                   NA
## 46            11.097193
## 47            15.392815
## 48             8.187695
## 49             8.116661
## 50                   NA
## 51             8.216916
## 52                   NA
## 53                   NA
## 54            18.364238
## 55            16.346707
## 56                   NA
## 57            15.974073
## 58             9.363296
#UScovid_dataset$fips <- fips(brew_count_by_state$state)
attach(UScovid_dataset)
UScovid_dataset_fips <- UScovid_dataset[order(FIPS),] 
detach(UScovid_dataset)


UScovid_dataset_fips$fips = UScovid_dataset_fips$FIPS

plot_usmap(data = UScovid_dataset_fips,  values = "Deaths", color = rgb(.2, .7, 1)) + 
    labs(title = "Covid Deaths by State", subtitle = "Count of Covid19 Deaths per state") + 
  scale_fill_continuous(low = "white", high = rgb(.2, .7, 1), name = "Deaths per state", label = scales::comma) + theme(legend.position = "right")

plot_usmap(data = filter(UScovid_dataset_fips, Province_State != 'New York'),  values = "Deaths", color = rgb(.2, .7, 1)) + 
    labs(title = "Covid Deaths by State (New York Removed)", subtitle = "Count of Covid19 Deaths per state") + 
  scale_fill_continuous(low = "white", high = rgb(.2, .7, 1), name = "Deaths per state", label = scales::comma) + theme(legend.position = "right")

plot_usmap(data = filter(UScovid_dataset_fips, Province_State != 'New York' & Province_State != 'New Jersey'),  values = "Deaths", color = rgb(.2, .7, 1)) + 
    labs(title = "Covid Deaths by State (New York Removed)", subtitle = "Count of Covid19 Deaths per state") + 
  scale_fill_continuous(low = "white", high = rgb(.2, .7, 1), name = "Deaths per state", label = scales::comma) + theme(legend.position = "right")

Previous Work - Finding the top Countries by Confirmed Cases

confirmed_by_country <- covid_raw%>% group_by(Country_Region) %>% tally(Confirmed, name = "Confirmed", sort = TRUE)
confirmed_by_country
## # A tibble: 1 x 2
##   Country_Region Confirmed
##   <fct>              <int>
## 1 US               1577147
deaths_by_country <- covid_raw%>% group_by(Country_Region) %>% tally(Deaths, name = "Deaths", sort = TRUE)
deaths_by_country
## # A tibble: 1 x 2
##   Country_Region Deaths
##   <fct>           <int>
## 1 US              94702
totals <- merge(confirmed_by_country, deaths_by_country, by="Country_Region")
totals
##   Country_Region Confirmed Deaths
## 1             US   1577147  94702

Then reordered by Confirmed

top_to_least <- totals[order(totals$Confirmed, decreasing = TRUE),]

top_to_least
##   Country_Region Confirmed Deaths
## 1             US   1577147  94702
top10Confirmed <- top_to_least %>% head(10) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Country_Region)) + geom_point(aes(fill=Country_Region)) + ggtitle("Deaths vs Confirmed Cases in Top countries")

ggplotly(top10Confirmed)
# At the time, China was the highest and I wanted to look at the rest, now it is much different
top10ConfirmedMinusChina <- subset(top_to_least, Country_Region != "China")  %>% head(10) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Country_Region)) + geom_point(aes(fill=Country_Region)) + ggtitle("Deaths vs Confirmed Cases in Top Countries (Minus China)")
ggplotly(top10ConfirmedMinusChina)
# Now removing US instead
top10ConfirmedMinusUS <- subset(top_to_least, Country_Region != "US")  %>% head(10) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Country_Region)) + geom_point(aes(fill=Country_Region)) + ggtitle("Deaths vs Confirmed Cases in Top Countries (Minus US)")
ggplotly(top10ConfirmedMinusUS)